library(tidyverse)
library(lubridate)
library(janitor)
library(here)
library(hrbrthemes)
library(kableExtra)
library(DT)
library(skimr)
library(outbreaks)
library(incidence2)
library(epicontacts)
library(epitrix)
theme_set(theme_minimal())
measles_data <- measles_hagelloch_1861 %>%
  tibble()
skim(measles_data)
Data summary
Name measles_data
Number of rows 188
Number of columns 12
_______________________
Column type frequency:
Date 3
factor 3
numeric 6
________________________
Group variables None

Variable type: Date

skim_variable n_missing complete_rate min max median n_unique
date_of_prodrome 0 1.00 1861-10-30 1862-01-24 1861-12-01 36
date_of_rash 0 1.00 1861-11-03 1862-01-27 1861-12-05 35
date_of_death 176 0.06 1861-11-18 1861-12-28 1861-12-13 8

Variable type: factor

skim_variable n_missing complete_rate ordered n_unique top_counts
gender 11 0.94 FALSE 2 m: 94, f: 83
class 0 1.00 FALSE 3 0: 90, 2: 68, 1: 30
complications 0 1.00 FALSE 1 yes: 188

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
case_ID 0 1.00 94.50 54.42 1.0 47.75 94.5 141.25 188 ▇▇▇▇▇
infector 4 0.98 82.15 64.20 1.0 31.00 50.5 153.00 188 ▇▇▃▂▇
age 0 1.00 6.99 4.30 0.0 3.00 7.0 11.00 15 ▇▆▃▇▃
family_ID 0 1.00 30.95 16.59 1.0 18.00 31.0 43.00 69 ▅▇▇▆▂
x_loc 0 1.00 188.22 59.09 7.5 145.00 182.5 240.00 280 ▁▂▇▆▇
y_loc 0 1.00 133.78 67.04 5.0 80.00 147.5 187.50 240 ▃▃▃▇▅

Building an Epicurve

Incidence

raw_incidence <- incidence(
  measles_data,
  date_of_prodrome,
  interval = "week",
  groups = c(gender, class)
  )

facet_plot(
  raw_incidence,
  fill = gender,
  facets = class,
  show_cases = TRUE
  ) +
  scale_fill_viridis_d(na.value = "black") +
  labs(
    title = "Epicurve of the Hagelloch 1861 measles outbreak",
    subtitle = "Stratified by class and gender",
    x = "Date of onset",
    y = "Number of cases (daily)"
  )
## Scale for 'fill' is already present. Adding another scale for 'fill', which will replace the existing scale.

Cumulative Incidence

raw_incidence %>%
  cumulate() %>%
  facet_plot(
    fill = gender,
    facets = class
    # show_cases = TRUE
  ) +
  scale_fill_viridis_d(na.value = "black") +
  labs(
    title = "Cumulative Epicurve of the Hagelloch 1861 measles outbreak",
    subtitle = "Stratified by class",
    x = "Date of onset",
    y = "Number of cases (daily)"
  )
## Scale for 'fill' is already present. Adding another scale for 'fill', which will replace the existing scale.

Estimating delays

measles_data <- measles_data %>%
  mutate(delay_rash = as.integer(date_of_rash - date_of_prodrome))
ggplot(measles_data, aes(x = delay_rash)) +
  geom_bar(fill = "#d15656") +
  labs(
    title = "Empirical distribution of delays from onset to rash",
    x = "Days from onset of symptoms to rash",
    y = "Number of cases"
    )

delay_rash_fit <- measles_data %>%
  pull(delay_rash) %>%
  epitrix::fit_disc_gamma()

delay_rash_fit
## $mu
## [1] 4.446486
## 
## $cv
## [1] 0.3785482
## 
## $sd
## [1] 1.68321
## 
## $ll
## [1] -360.0598
## 
## $converged
## [1] TRUE
## 
## $distribution
## A discrete distribution
##   name: gamma
##   parameters:
##     shape: 6.97842734893307
##     scale: 0.637176003492939
delay_rash_comparison <- measles_data %>%
  count(delay_rash) %>%
  complete(delay_rash = 0:15, fill = list(n = 0)) %>%
  mutate(
    prop = n / sum(n),
    prop_dist = delay_rash_fit$distribution$d(delay_rash)
    )
ggplot(delay_rash_comparison, aes(x = delay_rash)) +
  geom_col(aes(y = prop), fill = "#d15656") +
  geom_line(aes(y = prop_dist)) +
  labs(
    title = "Empirical vs Esimated distribution of delays from onset to rash",
    x = "Days from onset of symptoms to rash",
    y = "Proportion/probability"
    )

Estimating severity

measles_data <- measles_data %>%
  mutate(outcome = if_else(is.na(date_of_death), "recovered", "dead"))
tabyl(measles_data$outcome)
##  measles_data$outcome   n    percent
##                  dead  12 0.06382979
##             recovered 176 0.93617021

Visualising transmission chains

measles_contacts <- measles_data %>%
  select(infector, case_ID)
measles_chains <- make_epicontacts(
  linelist = measles_data,
  contacts = measles_contacts,
  id = "case_ID",
  from = "infector",
  to = "case_ID",
  directed = TRUE
)
measles_chains
## 
## /// Epidemiological Contacts //
## 
##   // class: epicontacts
##   // 188 cases in linelist; 188 contacts;  directed 
## 
##   // linelist
## 
## tibble [188, 14] 
## id               int  1 2 3 4 5 6
## infector         int  45 45 172 180 45 180
## date_of_prodrome date 1861-11-21 1861-11-23 1861-11-28 1861-11-27 1861-11-22 1861-11-26
## date_of_rash     date 1861-11-25 1861-11-27 1861-12-02 1861-11-28 1861-11-27 1861-11-29
## date_of_death    date NA NA NA NA NA NA
## age              dbl  7 6 4 13 8 12
## gender           fct  f f f m f m
## family_ID        int  41 41 41 61 42 42
## class            fct  1 1 0 2 1 2
## complications    fct  yes yes yes yes yes yes
## x_loc            dbl  142.5 142.5 142.5 165 145 145
## y_loc            dbl  100 100 100 102.5 120 120
## delay_rash       int  4 4 4 1 5 3
## outcome          chr  recovered recovered recovered recovered recovered recovered 
## 
##   // contacts
## 
## tibble [188, 2] 
## from int 45 45 172 180 45 180
## to   int 1 2 3 4 5 6
plot(measles_chains)
all_ids  <- measles_chains %>%
  get_id(which = "all") %>%
  na.omit()
measles_data %>%
  count(infector) %>%
  complete(
    infector = all_ids,
    fill = list(n = 0)
  ) %>%
  ggplot(aes(x = n)) +
  geom_bar(fill = "#18415a") +
  labs(
    title = "Distribution of case reproduction numbers",
    x = "Number of secondary cases",
    y = "Frequency"
  )

measles_chains %>%
  vis_epicontacts(
    node_color = "family_ID",
    node_shape = "gender",
    shapes = c("m" = "male", "f" = "female")
    )

Estimating the serial interval

measles_data <- measles_data %>%
  mutate(
    serial_interval = as.integer(date_of_prodrome - date_of_prodrome[infector])
  )

# Could also use the following code to estimate the serial interval
get_pairwise(measles_chains, "date_of_prodrome")
##   [1] 10 12  9 10 11  9  9 10 11 10 10  9 10  9 13  8  7 11  7  9 10 10 10  9  9 11  9  8  9 14 10 10 13  8 10 10 10  9 12 11 10  8 11 11 12 10 13 10  8 11 10  9 10 12 11 11  9 11  8 10  9  7  9 10 10 12  7 12 11 11 10 11 10 10  8 11 10  9  9  8  8 11  9
##  [84]  8 10 12  8  8 12 10 11 12 11  9 13  9 10 10  9  9 10 10  8 12 14  8 10 11 15 11 10 10 13  9 10 10 11 11 10 12 11 12  9 11 13 10 14  9 11  8 13 11 12  9  9  9 10 10 10  9 NA 12 12 13 12 12 10 11 12  8 13 12 13  9 12 11 12 12 14 12  9  9 10 10 12 10
## [167] 10  8 10 13 11 11 NA NA 11 10  8  8 11 10 14 16 12 NA 11 11 15 11
serial_interval_fit <- measles_data %>%
  pull(serial_interval) %>%
  epitrix::fit_disc_gamma()
## Warning in epitrix::fit_disc_gamma(.): 4 NAs were removed from the data before fitting.
serial_interval_fit
## $mu
## [1] 10.89177
## 
## $cv
## [1] 0.1500888
## 
## $sd
## [1] 1.634733
## 
## $ll
## [1] -352.9525
## 
## $converged
## [1] TRUE
## 
## $distribution
## A discrete distribution
##   name: gamma
##   parameters:
##     shape: 44.3918886018654
##     scale: 0.245355063017598
serial_interval_comparison <- measles_data %>%
  count(serial_interval) %>%
  complete(serial_interval = 0:20, fill = list(n = 0)) %>%
  mutate(
    prop = n / sum(n),
    prop_dist = serial_interval_fit$distribution$d(serial_interval)
    )
ggplot(serial_interval_comparison, aes(x = serial_interval)) +
  geom_col(aes(y = prop), fill = "#2b4885") +
  geom_line(aes(y = prop_dist)) +
  labs(
    title = "Empirical vs Esimated distribution of delays from onset to rash",
    x = "Days from onset of symptoms to rash",
    y = "Proportion/probability"
    )
## Warning: Removed 1 rows containing missing values (position_stack).
## Warning: Removed 1 row(s) containing missing values (geom_path).

Transmission chains: further investigations

get_pairwise(measles_chains, "gender") %>%
  tabyl()
##       .  n   percent valid_percent
##  f -> f 37 0.1968085     0.2228916
##  f -> m 38 0.2021277     0.2289157
##  m -> f 40 0.2127660     0.2409639
##  m -> m 51 0.2712766     0.3072289
##    <NA> 22 0.1170213            NA
get_pairwise(measles_chains, "gender", f = table) %>%
  chisq.test()
## 
##  Pearson's Chi-squared test with Yates' continuity correction
## 
## data:  .
## X-squared = 0.28626, df = 1, p-value = 0.5926
get_pairwise(measles_chains, "class", f = table) %>%
  as_tibble() %>%
  rename(infector = "values.from", infectee = "values.to") %>%
  ggplot(aes(x = infectee, y = infector)) +
  geom_tile(aes(fill = n)) +
  scale_fill_viridis_c() +
  labs(title = "Tranmission patterns by class")

get_pairwise(measles_chains, "age", f = table) %>%
  as_tibble() %>%
  rename(infector = "values.from", infectee = "values.to") %>%
  ggplot(aes(x = infectee, y = infector)) +
  geom_tile(aes(fill = n)) +
  scale_fill_viridis_c() +
  labs(title = "Tranmission patterns by age")

# Indicate if person infected by someone in their family
prop_same <- function(a, b) {
  mean(a == b, na.rm = TRUE)
}

# Indicate if person infected by someone in their family
prop_same_perm <- function(a, b) {
  perm_a <- sample(a, replace = FALSE)
  mean(perm_a == b, na.rm = TRUE)
}

prop_same_fam <- measles_chains %>%
  get_pairwise(attribute = "family_ID",
               f = prop_same)

prop_same_fam_ref <- purrr::map_dbl(
  1:999,
  function(.x) {
    get_pairwise(measles_chains,
                 attribute = "family_ID",
                 f = prop_same_perm)
  }
)

prop_same_fam / mean(prop_same_fam_ref)
## [1] 21.88421
## [1] 21.39722

qplot(prop_same_fam_ref, geom = "histogram") +
  theme_bw() +
  geom_vline(xintercept = prop_same_fam, color = "#E03F57", size = 1) +
  labs(x = "Proportion of transmission within the same family",
       y = "Frequency")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.